In [1]:
# import libraries 

from bs4 import BeautifulSoup
import requests
import time
import datetime

import smtplib
In [2]:
# Connect to the website
URL = 'https://www.amazon.com/Stick-Figure-Friendship-Sarcastic-Funny/dp/B076FTC6FQ/ref=pd_di_sccai_1/130-0859633-0246240?pd_rd_w=H6E0a&pf_rd_p=c9443270-b914-4430-a90b-72e3e7e784e0&pf_rd_r=3VATEZ8E3TYFF3QSXESN&pd_rd_r=7b765e13-638c-445f-a53b-a2518959fe33&pd_rd_wg=L5D2O&pd_rd_i=B076FTC6FQ&psc=1'

headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.63 Safari/537.36", "Accept-Encoding":"gzip, deflate, br", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9", "DNT":"1","Connection":"close", "Upgrade-Insecure-Requests":"1"}

page = requests.get(URL, headers=headers)

soup1 = BeautifulSoup(page.content, "html.parser")
soup2 = BeautifulSoup(soup1.prettify(), 'html.parser')

title = soup2.find(id = 'productTitle').get_text()
price = soup2.find(id='priceblock_ourprice').get_text()

print(title)
print(price)
                   I Got Your Back Stick Figure Friendship Novelty Sarcasm Teens Funny T Shirt
                  

                    $16.99
                   
In [3]:
# Clean up the data a little bit

title = title.strip()
price = price.strip()[1:]

print(title)
print(price)
I Got Your Back Stick Figure Friendship Novelty Sarcasm Teens Funny T Shirt
16.99
In [4]:
import datetime

today = datetime.date.today()

print(today)
2021-09-14
In [5]:
import csv

header = ['Title', 'price', 'Date']
data = [title, price, today]

with open('AmazonWebScraperDataset.csv', 'w', newline='', encoding='UTF8') as f:
    writer = csv.writer(f)
    writer.writerow(header)
    writer.writerow(data)
In [6]:
import pandas as pd

df = pd.read_csv(r'C:\Users\Heng Kimhak\AmazonWebScraperDataset.csv')

print(df)
                                               Title  price        Date
0  I Got Your Back Stick Figure Friendship Novelt...  16.99  2021-09-14
In [7]:
# Now we are appending data to the CSV

with open('AmazonWebScraperDataset.csv', 'a+', newline='', encoding='UTF8') as f:
    writer = csv.writer(f)
    writer.writerow(data)
In [8]:
def check_price():
    URL = 'https://www.amazon.com/Stick-Figure-Friendship-Sarcastic-Funny/dp/B076FTC6FQ/ref=pd_di_sccai_1/130-0859633-0246240?pd_rd_w=H6E0a&pf_rd_p=c9443270-b914-4430-a90b-72e3e7e784e0&pf_rd_r=3VATEZ8E3TYFF3QSXESN&pd_rd_r=7b765e13-638c-445f-a53b-a2518959fe33&pd_rd_wg=L5D2O&pd_rd_i=B076FTC6FQ&psc=1'
    headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.63 Safari/537.36", "Accept-Encoding":"gzip, deflate, br", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9", "DNT":"1","Connection":"close", "Upgrade-Insecure-Requests":"1"}
    page = requests.get(URL, headers=headers)

    soup1 = BeautifulSoup(page.content, "html.parser")
    soup2 = BeautifulSoup(soup1.prettify(), 'html.parser')

    title = soup2.find(id = 'productTitle').get_text()
    price = soup2.find(id='priceblock_ourprice').get_text()
    
    title = title.strip()
    price = price.strip()[1:]
    
    
    import datetime
    today = datetime.date.today()
    
    
    import csv
    header = ['Title', 'price', 'Date']
    data = [title, price, today]

    with open('AmazonWebScraperDataset.csv', 'w', newline='', encoding='UTF8') as f:
        writer = csv.writer(f)
        writer.writerow(header)
        writer.writerow(data)
    
In [9]:
# Runs check_price after a set time and inputs data into your CSV

# while(True):
#     check_price()
#     time.sleep(5)
In [10]:
import pandas as pd

df = pd.read_csv(r'C:\Users\Heng Kimhak\AmazonWebScraperDataset.csv')

print(df)
                                               Title  price        Date
0  I Got Your Back Stick Figure Friendship Novelt...  16.99  2021-09-14
1  I Got Your Back Stick Figure Friendship Novelt...  16.99  2021-09-14
In [ ]: